{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# More JOIN operations\n",
    "\n",
    "![rel](https://sqlzoo.net/w/images/1/10/Movie-er.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading spark-stubs, spark-hive\n",
      "Adding Hive conf dir /opt/hive/conf to classpath\n",
      "Creating SparkSession\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: No SLF4J providers were found.\n",
      "SLF4J: Defaulting to no-operation (NOP) logger implementation\n",
      "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"http://jupyter:4040\">Spark UI</a>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$                                  \n",
       "\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
       "\n",
       "\u001b[39m\n",
       "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@21af06b6"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import $ivy.`org.apache.spark::spark-sql:3.4.0`\n",
    "\n",
    "import org.apache.log4j.{Level, Logger}\n",
    "Logger.getLogger(\"org\").setLevel(Level.OFF)\n",
    "\n",
    "import org.apache.spark._\n",
    "import org.apache.spark.sql._\n",
    "import org.apache.spark.sql.functions._\n",
    "\n",
    "val spark = {\n",
    "    NotebookSparkSession.builder()\n",
    "    .progress(false)\n",
    "    .appName(\"app07\")\n",
    "    // .master(\"spark://192.168.31.31:7077\")\n",
    "    .master(\"local[*]\")\n",
    "    .config(\"spark.sql.warehouse.dir\", \n",
    "            \"hdfs://192.168.31.31:9000/user/hive/warehouse\") \n",
    "    .config(\"spark.cores.max\", \"4\") \n",
    "    .config(\"spark.executor.instances\", \"1\") \n",
    "    .config(\"spark.executor.cores\", \"2\") \n",
    "    .config(\"spark.executor.memory\", \"10g\") \n",
    "    .config(\"spark.shuffle.service.enabled\", \"false\") \n",
    "    .config(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "    .config(\"spark.sql.catalogImplementation\", \"hive\")\n",
    "    .config(\"spark.sql.repl.eagerEval.enabled\", \"true\")\n",
    "    .config(\"spark.driver.allowMultipleContexts\", \"true\")\n",
    "    .getOrCreate()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
       "\u001b[39m\n",
       "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
       "\u001b[36mhiveCxt\u001b[39m: \u001b[32msql\u001b[39m.\u001b[32mhive\u001b[39m.\u001b[32mHiveContext\u001b[39m = org.apache.spark.sql.hive.HiveContext@5545b746"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import spark.implicits._\n",
    "def sc = spark.sparkContext\n",
    "val hiveCxt = new org.apache.spark.sql.hive.HiveContext(sc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "// Credit to Aivean\n",
    "implicit class RichDF(val ds:DataFrame) {\n",
    "    def showHTML(limit: Int = 50, truncate: Int = 100) = {\n",
    "        import xml.Utility.escape\n",
    "        val data = ds.take(limit)\n",
    "        val header = ds.schema.fieldNames.toSeq        \n",
    "        val rows: Seq[Seq[String]] = data.map { row =>\n",
    "          row.toSeq.map {cell =>\n",
    "            val str = cell match {\n",
    "              case null => \"null\"\n",
    "              case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
    "              case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
    "              case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
    "              case _ => cell.toString\n",
    "            }\n",
    "            if (truncate > 0 && str.length > truncate) {\n",
    "              // do not show ellipses for strings shorter than 4 characters.\n",
    "              if (truncate < 4) str.substring(0, truncate)\n",
    "              else str.substring(0, truncate - 3) + \"...\"\n",
    "            } else {\n",
    "              str\n",
    "            }\n",
    "          }: Seq[String]\n",
    "        }\n",
    "    publish.html(s\"\"\" <table>\n",
    "                <tr>\n",
    "                 ${header.map(h => s\"<th>${escape(h)}</th>\").mkString}\n",
    "                </tr>\n",
    "                ${rows.map {row =>\n",
    "                  s\"<tr>${row.map{c => s\"<td>${escape(c)}</td>\" }.mkString}</tr>\"\n",
    "                }.mkString}\n",
    "            </table>\n",
    "        \"\"\")\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mmovie\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: int, title: string ... 4 more fields]\n",
       "\u001b[36mactor\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: int, name: string]\n",
       "\u001b[36mcasting\u001b[39m: \u001b[32mDataFrame\u001b[39m = [movieid: int, actorid: int ... 1 more field]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val movie = hiveCxt.table(\"sqlzoo.movie\")\n",
    "val actor = hiveCxt.table(\"sqlzoo.actor\")\n",
    "val casting = hiveCxt.table(\"sqlzoo.casting\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 1962 movies\n",
    "\n",
    "List the films where the **yr** is 1962 [Show **id, title**]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>id</th><th>title</th>\n",
       "                </tr>\n",
       "                <tr><td>10212</td><td>A Kind of Loving</td></tr><tr><td>10329</td><td>A Symposium on Popular Songs</td></tr><tr><td>10347</td><td>A Very Private Affair (Vie PrivÃ©e)</td></tr><tr><td>10648</td><td>An Autumn Afternoon</td></tr><tr><td>10868</td><td>Atraco a las tres</td></tr><tr><td>11006</td><td>Barabbas</td></tr><tr><td>11053</td><td>Battle Beyond the Sun (ÐÐµÐ±Ð¾ Ð·Ð¾Ð²ÐµÑ‚)</td></tr><tr><td>11199</td><td>Big and Little Wong Tin Bar</td></tr><tr><td>11230</td><td>Billy Budd</td></tr><tr><td>11234</td><td>Billy Rose's Jumbo</td></tr><tr><td>11242</td><td>Birdman of Alcatraz</td></tr><tr><td>11373</td><td>Boccaccio '70</td></tr><tr><td>11391</td><td>Bon Voyage!</td></tr><tr><td>11439</td><td>Boys' Night Out</td></tr><tr><td>11692</td><td>Cape Fear</td></tr><tr><td>11735</td><td>Carnival of Souls</td></tr><tr><td>11753</td><td>Carry On Cruising</td></tr><tr><td>12368</td><td>David and Lisa</td></tr><tr><td>12384</td><td>Days of Wine and Roses</td></tr><tr><td>12710</td><td>Dr. No</td></tr><tr><td>12817</td><td>L'Eclisse</td></tr><tr><td>12967</td><td>Tutti a casa</td></tr><tr><td>12992</td><td>Experiment in Terror</td></tr><tr><td>13010</td><td>Eyes Without a Face</td></tr><tr><td>13484</td><td>Gay Purr-ee</td></tr><tr><td>13534</td><td>Gigot</td></tr><tr><td>13641</td><td>Gorath</td></tr><tr><td>13727</td><td>Gypsy</td></tr><tr><td>13741</td><td>Half Ticket</td></tr><tr><td>13798</td><td>Harakiri</td></tr><tr><td>14317</td><td>In Search of the Castaways</td></tr><tr><td>14454</td><td>It's Only Money</td></tr><tr><td>14550</td><td>Jigsaw</td></tr><tr><td>14718</td><td>Kid Galahad</td></tr><tr><td>14860</td><td>La commare secca</td></tr><tr><td>14873</td><td>La notte</td></tr><tr><td>14972</td><td>Lawrence of Arabia</td></tr><tr><td>15088</td><td>Life for Ruth</td></tr><tr><td>15173</td><td>Lolita</td></tr><tr><td>15182</td><td>Long Day's Journey into Night</td></tr><tr><td>15247</td><td>Love at Twenty</td></tr><tr><td>15297</td><td>Lycanthropus</td></tr><tr><td>15397</td><td>Mamma Roma</td></tr><tr><td>15564</td><td>Merrill's Marauders</td></tr><tr><td>15752</td><td>Mother Joan of the Angels</td></tr><tr><td>15779</td><td>Mr. Hobbs Takes a Vacation</td></tr><tr><td>15840</td><td>Mutiny on the Bounty</td></tr><tr><td>16203</td><td>On the Beat</td></tr><tr><td>16295</td><td>Os Cafajestes</td></tr><tr><td>16367</td><td>Panic in Year Zero!</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "movie.filter(movie(\"yr\")===1962).select(\"id\", \"title\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. When was Citizen Kane released?\n",
    "\n",
    "Give year of 'Citizen Kane'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th>\n",
       "                </tr>\n",
       "                <tr><td>1941</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "movie.filter(movie(\"title\")===\"Citizen Kane\").select(\"yr\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Star Trek movies\n",
    "\n",
    "List all of the Star Trek movies, include the **id**, **title** and **yr** (all of these movies include the words Star Trek in the title). Order results by year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>id</th><th>title</th><th>yr</th>\n",
       "                </tr>\n",
       "                <tr><td>17770</td><td>Star Trek: First Contact</td><td>1996</td></tr><tr><td>17771</td><td>Star Trek: Insurrection</td><td>1998</td></tr><tr><td>17772</td><td>Star Trek: The Motion Picture</td><td>1979</td></tr><tr><td>17773</td><td>Star Trek</td><td>2009</td></tr><tr><td>17774</td><td>Star Trek Generations</td><td>1994</td></tr><tr><td>17775</td><td>Star Trek II: The Wrath of Khan</td><td>1982</td></tr><tr><td>17776</td><td>Star Trek III: The Search for Spock</td><td>1984</td></tr><tr><td>17777</td><td>Star Trek IV: The Voyage Home</td><td>1986</td></tr><tr><td>17778</td><td>Star Trek Nemesis</td><td>2002</td></tr><tr><td>17779</td><td>Star Trek V: The Final Frontier</td><td>1989</td></tr><tr><td>17780</td><td>Star Trek VI: The Undiscovered Country</td><td>1991</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.filter(lower(movie(\"title\")).contains(\"star trek\"))\n",
    " .select(\"id\", \"title\", \"yr\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. id for actor Glenn Close\n",
    "\n",
    "What **id** number does the actor 'Glenn Close' have?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>id</th>\n",
       "                </tr>\n",
       "                <tr><td>140</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "actor.filter(actor(\"name\")===\"Glenn Close\").select(\"id\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. id for Casablanca\n",
    "\n",
    "What is the **id** of the film 'Casablanca'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>id</th>\n",
       "                </tr>\n",
       "                <tr><td>11768</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "movie.filter(movie(\"title\")===\"Casablanca\").select(\"id\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Cast list for Casablanca\n",
    "\n",
    "Obtain the cast list for 'Casablanca'.\n",
    "\n",
    "> _what is a cast list?_  \n",
    "> The cast list is the names of the actors who were in the movie.\n",
    "\n",
    "Use **movieid=11768**, (or whatever value you got from the previous question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>Peter Lorre</td></tr><tr><td>John Qualen</td></tr><tr><td>Madeleine LeBeau</td></tr><tr><td>Jack Benny</td></tr><tr><td>Dan Seymour</td></tr><tr><td>Norma Varden</td></tr><tr><td>Ingrid Bergman</td></tr><tr><td>Conrad Veidt</td></tr><tr><td>Leon Belasco</td></tr><tr><td>Humphrey Bogart</td></tr><tr><td>Sydney Greenstreet</td></tr><tr><td>Leonid Kinskey</td></tr><tr><td>Wolfgang Zilzer</td></tr><tr><td>Claude Rains</td></tr><tr><td>Curt Bois</td></tr><tr><td>Leo White</td></tr><tr><td>Ludwig StÃ¶ssel</td></tr><tr><td>Marcel Dalio</td></tr><tr><td>Paul Henreid</td></tr><tr><td>Joy Page</td></tr><tr><td>S. Z. Sakall</td></tr><tr><td>Dooley Wilson</td></tr><tr><td>William Edmunds</td></tr><tr><td>Gregory Gaye</td></tr><tr><td>Torben Meyer</td></tr><tr><td>Georges Renavent</td></tr><tr><td>Jean Del Val</td></tr><tr><td>Louis V. Arco</td></tr><tr><td>Trude Berliner</td></tr><tr><td>Ilka GrÃ¼nig</td></tr><tr><td>Richard Ryen</td></tr><tr><td>Hans Twardowski</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(casting.join(actor, casting(\"actorid\")===actor(\"id\"))\n",
    " .filter($\"movieid\"===11768)\n",
    " .select(\"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Alien cast list\n",
    "\n",
    "Obtain the cast list for the film 'Alien'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>John Hurt</td></tr><tr><td>Sigourney Weaver</td></tr><tr><td>Yaphet Kotto</td></tr><tr><td>Harry Dean Stanton</td></tr><tr><td>Ian Holm</td></tr><tr><td>Tom Skerritt</td></tr><tr><td>Veronica Cartwright</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"title\"===\"Alien\")\n",
    " .select(\"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Harrison Ford movies\n",
    "\n",
    "List the films in which 'Harrison Ford' has appeared"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>title</th>\n",
       "                </tr>\n",
       "                <tr><td>A Hundred and One Nights</td></tr><tr><td>Air Force One</td></tr><tr><td>American Graffiti</td></tr><tr><td>Apocalypse Now</td></tr><tr><td>Clear and Present Danger</td></tr><tr><td>Cowboys &amp; Aliens</td></tr><tr><td>Crossing Over</td></tr><tr><td>Dead Heat on a Merry-Go-Round</td></tr><tr><td>Extraordinary Measures</td></tr><tr><td>Firewall</td></tr><tr><td>Force 10 From Navarone</td></tr><tr><td>Hanover Street</td></tr><tr><td>Hawthorne of the U.S.A.</td></tr><tr><td>Hollywood Homicide</td></tr><tr><td>Indiana Jones and the Kingdom of the Crystal Skull</td></tr><tr><td>Indiana Jones and the Last Crusade</td></tr><tr><td>Indiana Jones and the Temple of Doom</td></tr><tr><td>Jimmy Hollywood</td></tr><tr><td>K-19: The Widowmaker</td></tr><tr><td>More American Graffiti</td></tr><tr><td>Morning Glory</td></tr><tr><td>Patriot Games</td></tr><tr><td>Presumed Innocent</td></tr><tr><td>Raiders of the Lost Ark</td></tr><tr><td>Random Hearts</td></tr><tr><td>Regarding Henry</td></tr><tr><td>Sabrina</td></tr><tr><td>Sally of the Sawdust</td></tr><tr><td>Shadows</td></tr><tr><td>Six Days Seven Nights</td></tr><tr><td>Smilin' Through</td></tr><tr><td>Star Wars Episode IV: A New Hope</td></tr><tr><td>Star Wars Episode V: The Empire Strikes Back</td></tr><tr><td>Star Wars Episode VI: Return of the Jedi</td></tr><tr><td>The Conversation</td></tr><tr><td>The Devil's Own</td></tr><tr><td>The Fugitive</td></tr><tr><td>The Mosquito Coast</td></tr><tr><td>null</td></tr><tr><td>What Lies Beneath</td></tr><tr><td>Witness</td></tr><tr><td>Working Girl</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"name\"===\"Harrison Ford\")\n",
    " .select(\"title\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Harrison Ford as a supporting actor\n",
    "\n",
    "List the films where 'Harrison Ford' has appeared - but not in the starring role. [Note: the ord field of casting gives the position of the actor. If ord=1 then this actor is in the starring role]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>title</th>\n",
       "                </tr>\n",
       "                <tr><td>A Hundred and One Nights</td></tr><tr><td>American Graffiti</td></tr><tr><td>Apocalypse Now</td></tr><tr><td>Cowboys &amp; Aliens</td></tr><tr><td>Dead Heat on a Merry-Go-Round</td></tr><tr><td>Extraordinary Measures</td></tr><tr><td>Force 10 From Navarone</td></tr><tr><td>Hawthorne of the U.S.A.</td></tr><tr><td>Jimmy Hollywood</td></tr><tr><td>More American Graffiti</td></tr><tr><td>Morning Glory</td></tr><tr><td>Sally of the Sawdust</td></tr><tr><td>Shadows</td></tr><tr><td>Smilin' Through</td></tr><tr><td>Star Wars Episode IV: A New Hope</td></tr><tr><td>Star Wars Episode V: The Empire Strikes Back</td></tr><tr><td>Star Wars Episode VI: Return of the Jedi</td></tr><tr><td>The Conversation</td></tr><tr><td>Working Girl</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter(($\"name\"===\"Harrison Ford\") && ($\"ord\">1))\n",
    " .select(\"title\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Lead actors in 1962 movies\n",
    "\n",
    "List the films together with the leading star for all 1962 films."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>title</th><th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>A Kind of Loving</td><td>Alan Bates</td></tr><tr><td>A Symposium on Popular Songs</td><td>Paul Frees</td></tr><tr><td>A Very Private Affair (Vie PrivÃ©e)</td><td>Brigitte Bardot</td></tr><tr><td>An Autumn Afternoon</td><td>Chishu Ryu</td></tr><tr><td>Atraco a las tres</td><td>JosÃ© Luis LÃ³pez VÃ¡zquez</td></tr><tr><td>Barabbas</td><td>Anthony Quinn</td></tr><tr><td>Battle Beyond the Sun (ÐÐµÐ±Ð¾ Ð·Ð¾Ð²ÐµÑ‚)</td><td>Aleksandr Shvorin</td></tr><tr><td>Big and Little Wong Tin Bar</td><td>Jackie Chan</td></tr><tr><td>Billy Budd</td><td>Terence Stamp</td></tr><tr><td>Billy Rose's Jumbo</td><td>Doris Day</td></tr><tr><td>Birdman of Alcatraz</td><td>Burt Lancaster</td></tr><tr><td>Boccaccio '70</td><td>Anita Ekberg</td></tr><tr><td>Bon Voyage!</td><td>Fred MacMurray</td></tr><tr><td>Boys' Night Out</td><td>Kim Novak</td></tr><tr><td>Cape Fear</td><td>Gregory Peck</td></tr><tr><td>Carnival of Souls</td><td>Candace Hilligoss</td></tr><tr><td>Carry On Cruising</td><td>Sid James</td></tr><tr><td>David and Lisa</td><td>Keir Dullea</td></tr><tr><td>Days of Wine and Roses</td><td>Jack Lemmon</td></tr><tr><td>Dr. No</td><td>Sean Connery</td></tr><tr><td>L'Eclisse</td><td>Alain Delon</td></tr><tr><td>Tutti a casa</td><td>Alberto Sordi</td></tr><tr><td>Experiment in Terror</td><td>Glenn Ford</td></tr><tr><td>Eyes Without a Face</td><td>Pierre Brasseur</td></tr><tr><td>Gay Purr-ee</td><td>Judy Garland</td></tr><tr><td>Gigot</td><td>Jackie Gleason</td></tr><tr><td>Gorath</td><td>Ryo Ikebe</td></tr><tr><td>Gypsy</td><td>Rosalind Russell</td></tr><tr><td>Half Ticket</td><td>Kishore Kumar</td></tr><tr><td>Harakiri</td><td>Tatsuya Nakadai</td></tr><tr><td>In Search of the Castaways</td><td>Hayley Mills</td></tr><tr><td>It's Only Money</td><td>Jerry Lewis</td></tr><tr><td>Jigsaw</td><td>Jack Warner</td></tr><tr><td>Kid Galahad</td><td>Elvis Presley</td></tr><tr><td>La commare secca</td><td>Marisa Solinas</td></tr><tr><td>La notte</td><td>Marcello Mastroianni</td></tr><tr><td>Life for Ruth</td><td>Michael Craig</td></tr><tr><td>Long Day's Journey into Night</td><td>Katharine Hepburn</td></tr><tr><td>Love at Twenty</td><td>Jean-Pierre LÃ©aud</td></tr><tr><td>Lycanthropus</td><td>Barbara Lass</td></tr><tr><td>Mamma Roma</td><td>Anna Magnani</td></tr><tr><td>Merrill's Marauders</td><td>null</td></tr><tr><td>Mother Joan of the Angels</td><td>Lucyna Winnicka</td></tr><tr><td>Mr. Hobbs Takes a Vacation</td><td>James Stewart</td></tr><tr><td>Mutiny on the Bounty</td><td>Marlon Brando</td></tr><tr><td>On the Beat</td><td>Norman Wisdom</td></tr><tr><td>Os Cafajestes</td><td>Daniel Filho</td></tr><tr><td>Panic in Year Zero!</td><td>Ray Milland</td></tr><tr><td>Period of Adjustment</td><td>Anthony Franciosa</td></tr><tr><td>Phaedra</td><td>Melina Mercouri</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter(($\"yr\"===1962) && ($\"ord\"===1))\n",
    " .select(\"title\", \"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11. Busy years for Rock Hudson\n",
    "\n",
    "Which were the busiest years for 'Rock Hudson', show the year and the number of movies he made each year for any year in which he made more than 2 movies."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>yr</th><th>count</th>\n",
       "                </tr>\n",
       "                <tr><td>1961</td><td>3</td></tr><tr><td>1953</td><td>5</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"name\"===\"Rock Hudson\")\n",
    " .select(\"yr\", \"title\")\n",
    " .groupBy(\"yr\")\n",
    " .count()\n",
    " .filter($\"count\" > 2)\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12. Lead actor in Julie Andrews movies\n",
    "\n",
    "List the film title and the leading actor for all of the films 'Julie Andrews' played in.\n",
    "\n",
    "> _Did you get \"Little Miss Marker twice\"?_   \n",
    "> Julie Andrews starred in the 1980 remake of Little Miss Marker and not the original(1934).\n",
    ">\n",
    "> Title is not a unique field, create a table of IDs in your subquery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>title</th><th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>10</td><td>Dudley Moore</td></tr><tr><td>Darling Lili</td><td>Julie Andrews</td></tr><tr><td>Despicable Me</td><td>Steve Carell</td></tr><tr><td>Duet for One</td><td>Julie Andrews</td></tr><tr><td>Hawaii</td><td>Julie Andrews</td></tr><tr><td>Little Miss Marker</td><td>Walter Matthau</td></tr><tr><td>Mary Poppins</td><td>Julie Andrews</td></tr><tr><td>Relative Values</td><td>Julie Andrews</td></tr><tr><td>Shrek the Third</td><td>Mike Myers</td></tr><tr><td>Star!</td><td>Julie Andrews</td></tr><tr><td>The Americanization of Emily</td><td>James Garner</td></tr><tr><td>The Pink Panther Strikes Again</td><td>Peter Sellers</td></tr><tr><td>The Princess Diaries</td><td>Anne Hathaway</td></tr><tr><td>The Princess Diaries 2: Royal Engagement</td><td>Anne Hathaway</td></tr><tr><td>null</td><td>Julie Andrews</td></tr><tr><td>The Tamarind Seed</td><td>Julie Andrews</td></tr><tr><td>Thoroughly Modern Millie</td><td>Julie Andrews</td></tr><tr><td>Tooth Fairy</td><td>Dwayne Johnson</td></tr><tr><td>Torn Curtain</td><td>Paul Newman</td></tr><tr><td>Victor Victoria</td><td>Julie Andrews</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36mb\u001b[39m: \u001b[32mList\u001b[39m[\u001b[32mAny\u001b[39m] = \u001b[33mList\u001b[39m(\n",
       "  \u001b[32m10016\u001b[39m,\n",
       "  \u001b[32m12354\u001b[39m,\n",
       "  \u001b[32m12497\u001b[39m,\n",
       "  \u001b[32m12766\u001b[39m,\n",
       "  \u001b[32m13846\u001b[39m,\n",
       "  \u001b[32m15145\u001b[39m,\n",
       "  \u001b[32m15476\u001b[39m,\n",
       "  \u001b[32m16870\u001b[39m,\n",
       "  \u001b[32m17445\u001b[39m,\n",
       "  \u001b[32m17765\u001b[39m,\n",
       "  \u001b[32m18270\u001b[39m,\n",
       "  \u001b[32m20136\u001b[39m,\n",
       "  \u001b[32m20136\u001b[39m,\n",
       "  \u001b[32m20180\u001b[39m,\n",
       "  \u001b[32m20181\u001b[39m,\n",
       "  \u001b[32m20509\u001b[39m,\n",
       "  \u001b[32m20627\u001b[39m,\n",
       "  \u001b[32m21023\u001b[39m,\n",
       "  \u001b[32m21154\u001b[39m,\n",
       "  \u001b[32m21171\u001b[39m,\n",
       "  \u001b[32m21483\u001b[39m\n",
       ")"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val b = (movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    "         .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    "         .filter($\"name\"===\"Julie Andrews\")\n",
    "         .select(\"movieid\")\n",
    "         .collect()\n",
    "         .map(_(0))\n",
    "         .toList)\n",
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter(($\"movieid\".isin(b: _*)) && ($\"ord\"===1))\n",
    " .select(\"title\", \"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 13. Actors with 15 leading roles\n",
    "\n",
    "Obtain a list, in alphabetical order, of actors who've had at least 15 **starring** roles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>Adam Sandler</td></tr><tr><td>Al Pacino</td></tr><tr><td>Anthony Hopkins</td></tr><tr><td>Antonio Banderas</td></tr><tr><td>Arnold Schwarzenegger</td></tr><tr><td>Barbara Stanwyck</td></tr><tr><td>Basil Rathbone</td></tr><tr><td>Ben Affleck</td></tr><tr><td>Bette Davis</td></tr><tr><td>Betty Grable</td></tr><tr><td>Bing Crosby</td></tr><tr><td>Bruce Willis</td></tr><tr><td>Bud Abbott</td></tr><tr><td>Burt Lancaster</td></tr><tr><td>Burt Reynolds</td></tr><tr><td>Buster Keaton</td></tr><tr><td>Cary Grant</td></tr><tr><td>Charles Bronson</td></tr><tr><td>Charlton Heston</td></tr><tr><td>Clark Gable</td></tr><tr><td>Claudette Colbert</td></tr><tr><td>Clint Eastwood</td></tr><tr><td>Dean Martin</td></tr><tr><td>Dennis Quaid</td></tr><tr><td>Denzel Washington</td></tr><tr><td>Dirk Bogarde</td></tr><tr><td>Dustin Hoffman</td></tr><tr><td>Edward G. Robinson</td></tr><tr><td>Elvis Presley</td></tr><tr><td>Errol Flynn</td></tr><tr><td>Frank Sinatra</td></tr><tr><td>Fred Astaire</td></tr><tr><td>Fredric March</td></tr><tr><td>Gary Cooper</td></tr><tr><td>Gene Hackman</td></tr><tr><td>George Clooney</td></tr><tr><td>Glenn Ford</td></tr><tr><td>Gregory Peck</td></tr><tr><td>Greta Garbo</td></tr><tr><td>Harrison Ford</td></tr><tr><td>Henry Fonda</td></tr><tr><td>Humphrey Bogart</td></tr><tr><td>Ingrid Bergman</td></tr><tr><td>Jack Lemmon</td></tr><tr><td>Jack Nicholson</td></tr><tr><td>Jackie Chan</td></tr><tr><td>James Cagney</td></tr><tr><td>James Mason</td></tr><tr><td>James Stewart</td></tr><tr><td>Jane Fonda</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"ord\"===1)\n",
    " .groupBy(\"actorid\", \"name\")\n",
    " .count()\n",
    " .filter($\"count\" >= 15)\n",
    " .filter(! isnull($\"name\"))\n",
    " .select(\"name\")\n",
    " .orderBy(\"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 14.\n",
    "List the films released in the year 1978 ordered by the number of actors in the cast, then by title."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>title</th><th>count</th>\n",
       "                </tr>\n",
       "                <tr><td>The Bad News Bears Go to Japan</td><td>50</td></tr><tr><td>The Swarm</td><td>37</td></tr><tr><td>Grease</td><td>28</td></tr><tr><td>American Hot Wax</td><td>27</td></tr><tr><td>The Boys from Brazil</td><td>26</td></tr><tr><td>Heaven Can Wait</td><td>25</td></tr><tr><td>Big Wednesday</td><td>21</td></tr><tr><td>A Night Full of Rain</td><td>19</td></tr><tr><td>A Wedding</td><td>19</td></tr><tr><td>Orchestra Rehearsal</td><td>19</td></tr><tr><td>The Cheap Detective</td><td>19</td></tr><tr><td>Go Tell the Spartans</td><td>18</td></tr><tr><td>Death on the Nile</td><td>17</td></tr><tr><td>Movie Movie</td><td>17</td></tr><tr><td>Superman</td><td>17</td></tr><tr><td>The Cat from Outer Space</td><td>17</td></tr><tr><td>The Driver</td><td>17</td></tr><tr><td>Blue Collar</td><td>16</td></tr><tr><td>Ice Castles</td><td>16</td></tr><tr><td>International Velvet</td><td>16</td></tr><tr><td>J.R.R. Tolkien's The Lord of the Rings</td><td>16</td></tr><tr><td>Alexandria... Why?</td><td>15</td></tr><tr><td>Bye Bye Monkey</td><td>15</td></tr><tr><td>Coming Home</td><td>15</td></tr><tr><td>David</td><td>15</td></tr><tr><td>Gray Lady Down</td><td>15</td></tr><tr><td>Occupation in 26 Pictures</td><td>15</td></tr><tr><td>Revenge of the Pink Panther</td><td>15</td></tr><tr><td>The Brink's Job</td><td>15</td></tr><tr><td>The Chant of Jimmie Blacksmith</td><td>15</td></tr><tr><td>The Water Babies</td><td>15</td></tr><tr><td>Violette NoziÃ¨re</td><td>15</td></tr><tr><td>Who'll Stop The Rain</td><td>15</td></tr><tr><td>Without Anesthesia</td><td>15</td></tr><tr><td>Bread and Chocolate</td><td>14</td></tr><tr><td>Closed Circuit</td><td>14</td></tr><tr><td>Damien: Omen II</td><td>14</td></tr><tr><td>I Wanna Hold Your Hand</td><td>14</td></tr><tr><td>The Empire of Passion</td><td>14</td></tr><tr><td>Almost Summer</td><td>13</td></tr><tr><td>An Unmarried Woman</td><td>13</td></tr><tr><td>Foul Play</td><td>13</td></tr><tr><td>Goin' South</td><td>13</td></tr><tr><td>The Left-Handed Woman</td><td>13</td></tr><tr><td>California Suite</td><td>12</td></tr><tr><td>Force 10 From Navarone</td><td>12</td></tr><tr><td>In Praise of Older Women</td><td>12</td></tr><tr><td>Jaws 2</td><td>12</td></tr><tr><td>Midnight Express</td><td>12</td></tr><tr><td>Piranha</td><td>12</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"yr\"===1978)\n",
    " .groupBy(\"title\", \"movieid\")\n",
    " .count()\n",
    " .select(\"title\", \"count\")\n",
    " .orderBy($\"count\".desc, $\"title\".asc)\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 15.\n",
    "\n",
    "List all the people who have worked with 'Art Garfunkel'."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>name</th>\n",
       "                </tr>\n",
       "                <tr><td>Beverly Johnson</td></tr><tr><td>Bill Paxton</td></tr><tr><td>Breckin Meyer</td></tr><tr><td>Bruce Jay Friedman</td></tr><tr><td>Cecilie Thomsen</td></tr><tr><td>Cindy Crawford</td></tr><tr><td>Donald Trump</td></tr><tr><td>Elio Fiorucci</td></tr><tr><td>Ellen Albertini Dow</td></tr><tr><td>Frederique van der Wal</td></tr><tr><td>Georgina Grenville</td></tr><tr><td>Harris Yulin</td></tr><tr><td>Heather Matarazzo</td></tr><tr><td>Heidi Klum</td></tr><tr><td>Julian Sands</td></tr><tr><td>Kurtwood Smith</td></tr><tr><td>Lauren Hutton</td></tr><tr><td>Lorna Luft</td></tr><tr><td>Mark Ruffalo</td></tr><tr><td>Michael York</td></tr><tr><td>Mike Myers</td></tr><tr><td>Neve Campbell</td></tr><tr><td>Peter Bogdanovich</td></tr><tr><td>Robert DoQui</td></tr><tr><td>Ron Jeremy</td></tr><tr><td>Ryan Phillippe</td></tr><tr><td>Salma Hayek</td></tr><tr><td>Sela Ward</td></tr><tr><td>Sherilyn Fenn</td></tr><tr><td>Sherry Stringfield</td></tr><tr><td>Sheryl Crow</td></tr><tr><td>Skipp Sudduth</td></tr><tr><td>Stars on 54</td></tr><tr><td>Thelma Houston</td></tr><tr><td>Valerie Perrine</td></tr><tr><td>Veronica Webb</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36ma\u001b[39m: \u001b[32mDataFrame\u001b[39m = [movieid: int]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val a = (movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    "         .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    "         .filter($\"name\"===\"Art Garfunkel\")\n",
    "         .select(\"movieid\"))\n",
    "(movie.join(casting, movie(\"id\")===casting(\"movieid\"), joinType=\"right\")\n",
    " .join(actor, col(\"actorid\")===actor(\"id\"), joinType=\"left\")\n",
    " .filter($\"name\" !== \"Art Garfunkel\")\n",
    " .join(a, Seq(\"movieid\"))\n",
    " .select(\"name\")\n",
    " .orderBy(\"name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".sc",
   "mimetype": "text/x-scala",
   "name": "scala",
   "nbconvert_exporter": "script",
   "version": "2.12.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
